#### Produce the master tables comparing OLS and Bayesian models

## Setup environment

setwd("~/research/coffee-dataverse/src")

do.origspec <- T # For determining priors
source("load.R", encoding="iso-8859-1")

library(rstan)
library(lfe)
library(splines)

## Run OLS regression

mod.base <- felm(as.formula(paste("logyield ~", mainspec.rhs, "| 0 | Munic�pio + year")), data=df)

## Run regression for each municipality

allreg <- data.frame()
for (reg in unique(df$Munic�pio)) {
    print(reg)
    rows <- which(df$Munic�pio == reg)
    if (length(rows) <= 16)
        next
    mod <- tryCatch({
        lm(logyield ~ tmin + gdd1000 + edd1000 + prcp + prcp2 + year, data=df[rows,])
    }, error=function(e) {
        NULL
    })
    if (!is.null(mod) && length(rows) - length(mod$na.action) > 16) {
        stderr <- sqrt(diag(vcov(mod)))
        allreg <- rbind(allreg, data.frame(region=reg, const=mod$coeff[1], tmin=mod$coeff[2], tmin.se=stderr[2], gdd1000=mod$coeff[3],
                                           gdd1000.se=stderr[3], edd1000=mod$coeff[4], edd1000.se=stderr[4], prcp=mod$coeff[5],
                                           prcp.se=stderr[5], prcp2=mod$coeff[6], prcp2.se=stderr[7], trend=mod$coeff[7],
                                           trend.se=stderr[7]))
    }
}

quantile(allreg$edd1000, na.rm=T)
colMeans(allreg[abs(allreg$edd1000) < 100, -1], na.rm=T)

valid <- !is.na(allreg$edd1000) & abs(allreg$edd1000) < 100 # 1 EDD -> negative log yields
valreg <- allreg[valid,]

## Summarize municipality-level regressions

regmus <- c(sum(valreg$tmin / valreg$tmin.se^2, na.rm=T) / sum(1 / valreg$tmin.se^2, na.rm=T),
            sum(valreg$gdd1000 / valreg$gdd1000.se^2, na.rm=T) / sum(1 / valreg$gdd1000.se^2, na.rm=T),
            sum(valreg$edd1000 / valreg$edd1000.se^2, na.rm=T) / sum(1 / valreg$edd1000.se^2, na.rm=T),
            sum(valreg$prcp / valreg$prcp.se^2, na.rm=T) / sum(1 / valreg$prcp.se^2, na.rm=T),
            sum(valreg$prcp2 / valreg$prcp2.se^2, na.rm=T) / sum(1 / valreg$prcp2.se^2, na.rm=T),
            sum(valreg$trend / valreg$trend.se^2, na.rm=T) / sum(1 / valreg$trend.se^2, na.rm=T))
regses <- sqrt(c(1 / sum(1 / valreg$tmin.se^2, na.rm=T),
                 1 / sum(1 / valreg$gdd1000.se^2, na.rm=T),
                 1 / sum(1 / valreg$edd1000.se^2, na.rm=T),
                 1 / sum(1 / valreg$prcp.se^2, na.rm=T),
                 1 / sum(1 / valreg$prcp2.se^2, na.rm=T),
                 1 / sum(1 / valreg$trend.se^2, na.rm=T)))

## Build up results dataframe

results <- data.frame(model='OLS', param=weatherpreds, mu=mod.base$coeff[1:length(weatherpreds)], se=mod.base$cse[1:length(weatherpreds)],
                      lo95=mod.base$coeff[1:length(weatherpreds)] - 1.96 * mod.base$cse[1:length(weatherpreds)],
                      hi95=mod.base$coeff[1:length(weatherpreds)] + 1.96 * mod.base$cse[1:length(weatherpreds)],
                      sdev=0)
results <- rbind(results, data.frame(model='OLS x reg.', param=c(weatherpreds, 'scale_trend'),
                                     mu=regmus, se=regses, lo95=regmus - 1.96 * regses, hi95=regmus + 1.96 * regses,
                                     sdev=as.numeric(apply(valreg, 2, sd))[c(2, 4, 6, 8, 10, 12)]))

## Load all Bayesian model calibrations

output.suffixes <- c("-ols-orig",  "-nos-orig", "-nosrational-orig", "-eq24rational-orig", "-eq24-orig")
for (output.suffix in output.suffixes) {
    param <- get.fit.params(output.suffix)
    param[grep("yield_coeff", param)] <- weatherpreds

    load(paste0("../data/combined-betagamma", output.suffix, ".RData"))

    for (pp in 1:length(param)) {
        if (param[pp] == 'pricememory') {
            name <- 'autoreg_price'
            values <- 1 - la$gamma[, 1, pp]
            betas <- la$beta[,, pp]
        } else if (param[pp] == 'autoreg') {
            name <- 'autoreg_planting'
            values <- la$gamma[, 1, pp]
            betas <- la$beta[,, pp]
        } else {
            name <- param[pp]
            values <- la$gamma[, 1, pp]
            betas <- la$beta[,, pp]
        }

        mu <- mean(values)
        se <- sd(values)
        lo95 <- quantile(values, .025)
        hi95 <- quantile(values, .975)
        sdev <- sd(colMeans(betas))
        results <- rbind(results, data.frame(model=output.suffix, param=name, mu, se, lo95, hi95, sdev))
    }
}

## Standaridize display of results

labels <- list('death_coeff1'="Die-off EDD effect", 'autoreg_planting'="Planting autoreg.", 'autoreg_price'="Nerlove autoreg.", 'cost_harvest'="Cost of harvest", 'cost_to_planting'="Planting price resp.", 'scale_trend'="Exogenous trend", 'yield_sigma'="Yield std. dev.", 'yield_uncertainty'="Range of yields", 'bearing_sigma'="Bearing std. dev.", 'harvest_sigma'="Harvest std. dev.", 'production_sigma'="Production std. dev.")
symbols <- list('death_coeff1'='\\delta', 'autoreg_planting'='\\rho_a', 'autoreg_price'='\\rho_p', 'cost_harvest'='c', 'cost_to_planting'='\\phi', 'scale_trend'='\\theta', 'yield_sigma'='\\sigma_y', 'yield_uncertainty'='\\Delta', 'bearing_sigma'='\\sigma_b', 'harvest_sigma'='\\sigma_h', 'production_sigma'='\\sigma_q')
for (kk in 1:length(weatherpreds))
    symbols[[weatherpreds[kk]]] <- weathersymbols[kk]

prep.results <- function(results) {
    results$citxt <- paste0("[", format(results$lo95, digits=2, nsmall=1), " - ", format(results$hi95, digits=2, nsmall=1), "]")

    results$param <- factor(results$param, levels=c(weatherpreds, 'death_coeff1', 'autoreg_planting', 'autoreg_price', 'cost_harvest', 'cost_to_planting', 'scale_trend', 'yield_sigma', 'yield_uncertainty', 'bearing_sigma', 'harvest_sigma', 'production_sigma'))

    results$label <- NA
    results$symbol <- NA
    for (ii in 1:nrow(results)) {
        if (results$param[ii] %in% weatherpreds)
            results$label[ii] <- weatherlabels[weatherpreds == results$param[ii]]
        else
            results$label[ii] <- labels[[as.character(results$param[ii])]]
        results$symbol[ii] <- symbols[[as.character(results$param[ii])]]
    }

    results
}

results <- prep.results(results)

library(reshape2)

## Prepare comparison table
results2 <- results

modelnames <- list('OLS'="OLS", 'OLS x reg.'='OLS x Muni', '-ols-orig'="Bayes. Reg.", '-nos-orig'="No Sel. (2)", "-eq24rational-orig"="Rational", "-nosrational-orig"="No Sel. (1)", '-eq24-orig'="Adaptive")
results2$modelname <- NA
for (ii in 1:nrow(results2))
    results2$modelname[ii] <- modelnames[[results2$model[ii]]]
results2$modelname <- factor(results2$modelname, sapply(names(modelnames), function(model) modelnames[[model]]))
results2$label <- factor(results2$label, levels=sapply(levels(results$param),
                                                       function(par) ifelse(par %in% weatherpreds, weatherlabels[weatherpreds == par], labels[[par]])))

results3 <- data.frame(modelname=rep(results2$modelname, 3), label=rep(results2$label, 3), symbol=rep(results2$symbol, 3),
                       values=c(format(results2$mu, digits=1, scientific=F),
                                paste0("(", format(results2$se, digits=1, scientific=F), ")"),
                                paste0("(", format(results2$sdev, digits=1, scientific=F), ")")),
                       group=rep(c('mu', 'se', 'sd'), each=nrow(results2)))
results3$group <- factor(results3$group, levels=c('mu', 'se', 'sd'))
results3$values <- gsub("( ", "(", gsub("( ", "(", results3$values, fixed=T), fixed=T)

results4 <- dcast(results3, label + symbol + group ~ modelname, value.var='values')
results4$label <- as.character(results4$label)
results4$label[seq(2, nrow(results4), by=3)] <- "\\hspace{1em}(S.E.)"
results4$label[seq(3, nrow(results4), by=3)] <- "\\hspace{1em}(Muni. SD)"
results4$label <- gsub("\\^2", "$^2$", results4$label)
results4$symbol <- as.character(results4$symbol)
results4$symbol <- paste("$", results4$symbol, "$")
results4$symbol[-seq(1, nrow(results4), by=3)] <- ""
names(results4)[1:2] <- c('Description', '')
results4 <- results4[, -3]

## Print table

library(xtable)

## All results
print(xtable(results4[, c(1:5, 8, 7, 9, 6)]), include.rownames=F, sanitize.text.function=function(x) x)

## Point estimates only
print(xtable(results4[seq(1, nrow(results4), by=3), c(1:5, 8, 7, 9, 6)]), include.rownames=F, sanitize.text.function=function(x) x)

## Report eq24-orig model with regional SD
subres <- subset(results, model == '-eq24-orig')
subres <- subres[order(subres$param),]

subres2 <- subres[, c('label', 'symbol', 'mu', 'citxt', 'sdev')]
names(subres2) <- c('Description', 'Param.', 'Estimate', 'Credible Interval', 'Regional SD')

library(xtable)

print(xtable(subres2), include.rownames=F, sanitize.text.function=function(x) x)

## Arabica vs. Robusta

## Setup environment

setwd("~/research/coffee-dataverse/src")
library(rstan)

do.origspec <- T # For determining priors
do.configonly <- T
source("load.R", encoding="iso-8859-1")

output.suffix.master <- "-eq24-orig" #"-eq24rational-orig"

## Load results
param <- get.fit.params(output.suffix.master)
param[grep("yield_coeff", param)] <- weatherpreds

results <- data.frame()
output.suffixes <- paste0(c("-arabica", "-robusta"), output.suffix.master)
for (output.suffix in output.suffixes) {
    load(paste0("../data/combined-betagamma", output.suffix, ".RData"))

    for (pp in 1:length(param)) {
        if (param[pp] == 'pricememory') {
            name <- 'autoreg_price'
            values <- 1 - la$gamma[, 1, pp]
            betas <- la$beta[,, pp]
        } else if (param[pp] == 'autoreg') {
            name <- 'autoreg_planting'
            values <- la$gamma[, 1, pp]
            betas <- la$beta[,, pp]
        } else {
            name <- param[pp]
            values <- la$gamma[, 1, pp]
            betas <- la$beta[,, pp]
        }

        mu <- mean(values)
        se <- sd(values)
        lo95 <- quantile(values, .025)
        hi95 <- quantile(values, .975)
        sdev <- sd(colMeans(betas))
        results <- rbind(results, data.frame(model=output.suffix, param=name, mu, se, lo95, hi95, sdev))
    }
}

## Prepare for display
results <- prep.results(results)

results2 <- data.frame(Description=results$label[1:(nrow(results)/2)], `Param.`=results$symbol[1:(nrow(results)/2)], Estimate=results$mu[1:(nrow(results)/2)], `Credible Interval`=results$citxt[1:(nrow(results)/2)], Estimate=results$mu[(nrow(results)/2+1):nrow(results)], `Credible Interval`=results$citxt[(nrow(results)/2+1):nrow(results)])
results2 <- results2[order(results$param[1:(nrow(results)/2)]),]

library(xtable)
print(xtable(results2), include.rownames=F, sanitize.text.function=function(x) x)

### Report results for both eq24-orig and eq24rational-orig
## NOTE: Run twice, collecting results, for -eq24-orig and -eq24rational-orig
results2.rat <- results2
results2.adt <- results2

library(dplyr)
results3 <- results2.adt %>% left_join(results2.rat, by=c('Description', 'Param.'), suffix=c('.adt', '.rat'))

print(xtable(results3[, c(1:2, 7, 3, 9, 5)]), include.rownames=F, sanitize.text.function=function(x) x)

## Add OLS regressions for regional subset

do.configonly <- F
source("load.R", encoding="iso-8859-1")

df.variety <- df %>% group_by(Munic�pio) %>% summarize(arabica.portion=mean(ifelse(is.na(arabica.produce), ifelse(is.na(robusta.produce), NA, 0), arabica.produce) / total.produce, na.rm=T), total.average=mean(total.produce, na.rm=T))

library(lfe)
library(splines)
mod.arabica <- felm(as.formula(paste("logyield ~", mainspec.rhs, "| 0 | Munic�pio + year")), data=subset(df, Munic�pio %in% df.variety$Munic�pio[df.variety$arabica.portion == 1]))
mod.robusta <- felm(as.formula(paste("logyield ~", mainspec.rhs, "| 0 | Munic�pio + year")), data=subset(df, Munic�pio %in% df.variety$Munic�pio[df.variety$arabica.portion == 0]))

results3$OLS.arabica <- c(mod.arabica$coeff[1:5], rep(NA, nrow(results3) - 5))
results3$OLS.robusta <- c(mod.robusta$coeff[1:5], rep(NA, nrow(results3) - 5))

## Print results

print(xtable(results3[, c(1:2, 11, 7, 3, 12, 9, 5)]), include.rownames=F, sanitize.text.function=function(x) x)
